library(tidyverse)
library(maptools)
library(spdep)

dir.create("../../data/ia_enumerate", showWarnings = FALSE)

i <- as.numeric(Sys.getenv("SLURM_ARRAY_TASK_ID"))

## -------------------
## Load data and clean
## -------------------
ia <- readShapePoly("../../data/ia/county.shp")
votes <- read_csv("../../data/ia/ia_vote.csv")
pops <- read_csv("../../data/ia/ia_pop.csv") %>%
    mutate(County = gsub(" County", "", County))
info <- inner_join(votes, pops) %>%
    rename(county = County,
           trump = `Trump #`,
           clinton = `Clinton #`,
           other = `Others #`,
           total = `Total`,
           fips = `FIPS code[10]`,
           pop = Population) %>%
    select(county, trump, clinton, fips, pop)

## Join with shapefile
ia@data$COUNTY <- as.character(ia@data$COUNTY)
ia@data$COUNTY[ia@data$COUNTY == "Obrien"] <- "O'Brien"
ia@data$id <- 1:nrow(ia@data)
ia@data <- inner_join(ia@data, info, by = c("COUNTY" = "county"))

## ----------------------
## Set up the enumeration
## ----------------------
adjlist <- poly2nb(ia, queen = FALSE)

## Sink
adjlist_map <- c()
for(k in 1:length(adjlist)){
    sub <- adjlist[[k]]
    sub <- sub[sub > k]
    if(length(sub) > 0){
        for(l in 1:length(sub)){
            adjlist_map <- rbind(adjlist_map, c(k, sub[l]))
        }
    }
}
write_delim(data.frame(adjlist_map),
            path = paste0("../../data/ia_enumerate/ia_", i, ".dat"),
            col_names = FALSE)

## Order edges
system(paste0("python ../ndscut.py <../../data/ia_enumerate/ia_", i, ".dat >../../data/ia_enumerate/ia_ordered_", i, ".dat"))

## Run enumpart
system(paste0("../../enumpart_private/enumpart ../../data/ia_enumerate/ia_ordered_", i, ".dat -k 4 -sample 10000000 -comp >/n/imai_lab/bfifield/enumeration/ia_enumerate/ia_sols_", i, ".dat"))

## -----------------------------
## Load in solutions, preprocess
## -----------------------------
sols_all <- readLines(paste0("/n/imai_lab/bfifield/enumeration/ia_enumerate/ia_sols_", i, ".dat"))
sols_all <- apply(do.call("cbind", strsplit(sols_all, " ")), 2, as.numeric)

## Calculate dissimilarity index and parity
seg_truth_sample <- unlist(lapply(1:ncol(sols_all), function(x){
    T <- sum(ia@data$pop)
    P <- sum(ia@data$trump) / T
    t_i <- tapply(ia@data$pop, sols_all[,x], sum)
    p_i <- tapply(ia@data$trump, sols_all[,x], sum) / t_i
    return(sum(t_i * abs(p_i - P) / (2 * T * P * (1 - P)), na.rm = TRUE))
}))

pop_dist_sample <- unlist(lapply(1:ncol(sols_all), function(x){
    distpop <- tapply(ia@data$pop, sols_all[,x], sum)
    parpop <- sum(distpop) / length(distpop)
    return(max(abs(distpop / parpop - 1)))
}))

seg_truth_sample <- data.frame(seg = seg_truth_sample, par = pop_dist_sample)
write_csv(seg_truth_sample, path = paste0("../../data/ia_enumerate/dissimilarity_", i, ".csv"))

